

libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';



*This program contains the information needed to create Table 1 of the paper;


DATA DD_POSTS;
set mylib.dd_posts18_21q2;;
DD_ID = _N_;
RUN;


*will limit sample of DD posts to firms that were in this final sample (e.g., common stocks with data in CRSP/Compustat);
DATA SAMPLE;
SET MYLIB.DAILY_PANEL_rfs;
WHERE TOTAL_DD_POSTS >0;
*limit from July 2018 to June 2021 (cum_month1 =  January 2018);
IF 7<=CUM_MONTH<=42;
pre_gme = .;
post_gme = .;
day = day(date);
month = month(date);

*define GME event day at January 13th 2021;

if cum_month <37 then pre_gme =1;
if cum_month > 37 then post_gme =1;

if cum_month = 37 and day <13 then pre_gme =1;
if cum_month = 37 and day >13 then post_gme =1;

*delete GME event day;
if cum_month = 37 and day =13 then delete;

if pre_gme = . then pre_gme =0;
if post_gme = . then post_gme =0;
RUN;





PROC SQL;
CREATE TABLE DD_POSTS2 AS
SELECT *
FROM DD_POSTS, SAMPLE
WHERE DD_POSTS.DAY0 = SAMPLE.DAY0 AND DD_POSTS.TICKER = SAMPLE.TICKER;
QUIT;




*provides summary statistics on unique firms;

PROC SORT NODUPKEY DATA=DD_POSTS2 OUT=FIRMS;
 BY TICKER;
 RUN;
 *also report summary stats by pre versus post-GME;
PROC SORT NODUPKEY DATA=DD_POSTS2 OUT=FIRMS2;
 BY TICKER post_gme;
 RUN;
 *635 firms pre-gme;
 DATA FIRMS20;
SET FIRMS2;
WHERE post_gme =0;
RUN;
*501 firms post gme;
DATA FIRMS21;
SET FIRMS2;
WHERE post_gme =1;
RUN;

*summary stats on firm days;

PROC SORT NODUPKEY DATA=DD_POSTS2 OUT=FIRM_DAYS;
 BY TICKER DAY0;
 RUN;
PROC SORT NODUPKEY DATA=DD_POSTS2 OUT=FIRMS_DAYS2;
 BY TICKER DAY0 post_gme;
 RUN;
 DATA FIRM_DAYS20;
SET FIRMS_DAYS2;
WHERE post_gme =0;
RUN;
DATA FIRM_DAYS21;
SET FIRMS_DAYS2;
WHERE post_gme =1;
RUN;


*summary stats on other variables;

PROC MEANS DATA=DD_POSTS2;
VAR PURCHASE COMMENTS1  GME_AMC_FLAG ;
RUN;

proc sort data=DD_POSTS2;
by post_gme;
run;

PROC MEANS DATA=DD_POSTS2;
VAR PURCHASE COMMENTS1  GME_AMC_FLAG ;
by post_gme;
RUN;



*SUMMARY STATS ON NUMBER OF POSTS PER AUTHOR;
proc sort data=DD_POSTS2;
by author;
run;
proc univariate noprint data=DD_POSTS2;
by author;
var dd_id;
output out=posts_per_author n=posts_per_author;
run;
proc means data=posts_per_author;
var posts_per_author;
run;


proc sort data=DD_POSTS2;
by POST_GME author ;
run;
proc univariate noprint data=DD_POSTS2;
by POST_GME author ;
var dd_id;
output out=posts_per_author n=posts_per_author;
run;
proc means data=posts_per_author;
BY POST_GME;
var posts_per_author;
run;


*now examine non-research posts (Panel B of Table 1);

*limit sample to time period that corresponds to DD post (72,636 posts);

Data posts;
set mylib.NON_RESEARCH_POSTS;
hour = hour(created_utc);
minute = minute(created_utc);
if hour > 16 then next_day =1;
else next_day =0;
post_date = datepart(created_utc);

adj_date =  post_date + next_day;
weekday = weekday(adj_date);
tradedate = adj_date;
if weekday = 7 then tradedate = adj_date +2;
if weekday = 1 then tradedate = adj_date +1;

format post_date yymmdd10.;
format tradedate yymmdd10.;

year = year(tradedate);
month = month(tradedate);
day = day(tradedate);


cum_month = (year -2018)* 12 + month;

IF 7<=CUM_MONTH<=42;
comments  = num_comments * 1;
run;


proc sort data=posts;
by author flair definite_ticker tradedate descending   comments;
run;
proc means data=posts;
var comments;
run;

*delete duplicate posts by same author and date - 56503;
proc sort nodupkey data=posts out=posts2;
by author flair definite_ticker tradedate;
run;


*focus on subset of posts that are very clearly non-research related (N = 20,269);




data posts2;
set posts2;

if flair = 'Gain' then keep =1;
if flair = 'Loss' then keep =1;
if flair = 'Meme' then keep =1;
if flair = 'Shitpost' then keep =1;
if flair = 'News' then keep =1;

if keep =1;

keep author flair definite_ticker tradedate comments cum_month;
run;




*require that firm can be matched to daily_panel_sample (CRSP - common stocks);


data sample;
set mylib.daily_panel_rfs;
keep ticker permno  DATE;
run;



*13,341 posts;




proc sql;
create table posts3 as
select *
from posts2, sample
where posts2.definite_ticker = sample.ticker and posts2.tradedate =  sample.date;
quit;

*13,255 posts;

data posts3;
set posts3;
day = day(date);
month = month(date);

if cum_month <37 then pre_gme =1;
if cum_month > 37 then post_gme =1;

if cum_month = 37 and day <13 then pre_gme =1;
if cum_month = 37 and day >13 then post_gme =1;

if cum_month = 37 and day =13 then delete;

if pre_gme = . then pre_gme =0;
if post_gme = . then post_gme =0;
run;

*report number of firms for non-research posts;

PROC SORT NODUPKEY DATA=posts3 OUT=FIRMS;
 BY TICKER;
 RUN;
PROC SORT NODUPKEY DATA=posts3 OUT=FIRMS2;
 BY TICKER post_gme;
 RUN;
DATA FIRMS20;
SET FIRMS2;
WHERE post_gme =0;
RUN;
DATA FIRMS21;
SET FIRMS2;
WHERE post_gme =1;
RUN;


*report firm-days;
PROC SORT NODUPKEY DATA=posts3 OUT=FIRMS_DAYS2;
 BY TICKER date post_gme;
 RUN;
DATA FIRM_DAYS20;
SET FIRMS_DAYS2;
WHERE post_gme =0;
RUN;
DATA FIRM_DAYS21;
SET FIRMS_DAYS2;
WHERE post_gme =1;
RUN;

*report other summary stats;
data posts3;
set posts3;
if ticker = 'GME' then GME_AMC_FLAG =1;
if ticker = 'AMC' then GME_AMC_FLAG =1;
IF GME_AMC_FLAG =. THEN GME_AMC_FLAG =0;
RUN;


PROC MEANS DATA=posts3;
VAR  comments GME_AMC_FLAG ;
RUN;
PROC SORT DATA=POSTS3;
BY POST_GME;
RUN;

PROC MEANS DATA=posts3;
BY POST_GME;
VAR  comments GME_AMC_FLAG ;
RUN;



*LOOK AT POSTS PER CONTRIBUTOR;


proc sort data=posts3;
by author;
run;
proc univariate noprint data=posts3;
by author;
var COMMENTS;
output out=posts_per_author n=posts_per_author;
run;
PROC UNIVARIATE DATA=posts_per_author;
VAR posts_per_author;
RUN;
proc univariate noprint data=posts3;
by author;
var COMMENTS;
output out=posts_per_author n=posts_per_author;
WHERE POST_GME =1;
run;
PROC UNIVARIATE DATA=posts_per_author;
VAR posts_per_author;
RUN;



*********************************************************************************
*finally will bring in Seeking Alpha Data - Panel C of Table 1 

*********************************************************************************;



data sa;
set mylib.seeking_alpha2b;
*will limit sample to same period as WSB;
if year <2018 then delete;
if year = 2018 and month <6 then delete;
run;






data sa;
set sa;
if rating = '' then delete;
if rating = 'neutral' then buy =0;;
if rating = 'bullish' then buy =1;
if rating = 'bearish' then buy =0;

if rating = 'very_bu' then buy =1;

if rating = 'very_be' then buy =0;
run;

*if the post occurs after the close of trading then will convert the day to the next day (i.e. the first day in which an investor could have traded the stock);

*all analysis will assume that investor buys at the close of trading on the first day that an investor could have bought the stock;



*if the post occurs on a weekend will convert to a monday;

data sa2;
set sa;
format date MMDDYY10.;
format tradedate MMDDYY10.;

date  = mdy (month, day, year);
weekday = weekday(date);

*if the trade occurs after trading hours, will add one day to the date;

tradedate = date;

if hour >=16 then tradedate = date +1;


*will then compute the first day of the week that you could trade;
*if day of the week is on weekend will convert to monday;

weekday = weekday(tradedate);

if weekday = 1 then tradedate = tradedate +1;
if weekday = 7 then tradedate = tradedate +2;


run;


data sample;
set mylib.daily_panel_rfs;
keep ticker permno  DATE gme_amc_flag post_gme;
run;


proc sql;
create table sa3 as
select *
from sa2, sample
where sa2.ticker = sample.ticker and sa2.tradedate =  sample.date;
quit;
*summary stats;
proc means data=sa3;
var buy article_comments_count gme_amc_flag;
run;
 proc sort data=sa3;
by post_gme;
run;
proc means data=sa3;
var buy article_comments_count gme_amc_flag;
by post_gme;
run;

proc sort nodupkey data=sa3 out=firm_days;
by ticker tradedate;
run;
proc sort nodupkey data=sa3 out=firms;;
by ticker ;
run;

proc sort data=sa3;
by author_id;
run;
proc univariate noprint data=sa3;
by author_id;
var buy;
output out=posts_per_author n= posts_per_author;
run;
proc univariate data=posts_per_author;
var posts_per_author;
run;
